/* 
    Purpose: Using the 1950 Census, this file locates black and white men aged 
             30-50 who are fathers of a child younger than 18 in the same household. Other 
             variables necessary to create average predicted father income (in 2b) are also 
             cleaned.

    Notes: Unable to use family income in 1950 because only sample line persons aged 14+ 
           are asked to report family income. Will use personal income instead.

    Creates: Census1950_fathers_ages30to50.dta
*/

clear
set more off
cd "$Mydirectory1/1_DataSources/CensusData/"

    use ./input/Census1950_1pct_raw.dta, clear //download from IPUMS USA
        tab perwt 

    tempfile fulldata
    save `fulldata'

*------------------------------------------------------------------------------------*
*------------------------------------------------------------------------------------*

***************************
** IDENTIFY FATHERS
***************************
/* Note: Must locate fathers before restricting the sample to sample line 
         respondents, who are unlikely to be <18. In fact, only ~7% of 1950 
         respondents are under 18 and are designated sample persons. Dropping 
         non-sample line people first will drop many respondents <18, 
         and thus a lot of Census fathers will not be counted.
*/   
    keep if age<18 //Restrict to children younger than 18
    keep serial poploc age
    
    replace poploc=. if poploc==0 
    drop if poploc==. //Exclude children without a father in the house

* Count number of kids per father
    gen tagkid =1
    bysort serial poploc: egen kidsperdad = total(tagkid)
    label var kidsperdad "Number of kids living in Census father's household"
    
    bysort serial poploc: keep if _n==1 //Keep all unique father ids. Some fathers will have multiple children in the Census. 
    rename poploc pernum
    drop age tagkid 

    tempfile children 
    save `children'

* Keep the sample of fathers 
    use `fulldata', clear
    merge 1:1 serial pernum using `children'
    keep if _merge==3 
    drop _merge

* Household-size adjusted weight
    /*Note: In 1950 the sample-line person
            weight (slwt) must be used. 
    */ 
    gen wgt1950_hhsizeadj = slwt*kidsperdad
    label var wgt1950_hhsizeadj "Alternative Census weight; adjusted for # kids in father's household"

*------------------------------------------------------------------------------------*
*------------------------------------------------------------------------------------*

***************************
** SET UP INCOME VARIABLES
***************************

* Restrict to sample-line persons
    keep if slrec==2
    
* Fix income variables 
    replace inctot=. if inctot==9999999 
    replace inctot=0 if inctot<0

/* Note: No need to adjust income using the 
         CPI, as Jácome et al put income in
         1950 dollars */
  
* Keep respondents with non-zero and non-missing income 
    foreach var of varlist inctot  {
    drop if `var'==0 | `var'==.
    }  
    
*------------------------------------------------------------------------------------*
*------------------------------------------------------------------------------------*

***************************
** CREATE OTHER NECESSARY VARIABLES
***************************
  
* Keep black and white fathers ages 30 to 50
    keep if inrange(age,30,50) 
    keep if race==1 | race==2 

    tab statefip, m
    rename statefip fips
 
* Region of current residence   
    * Northeast: Connecticut, Maine, Massachusetts, New Hampshire, Rhode Island, Vermont, New Jersey, New York, Pennsylvania
    * Midwest: Illinois, Indiana, Michigan, Ohio, Wisconsin, Iowa, Kansas, Minnesota, Missouri, Nebraska, North Dakota, South Dakota
    /* South: Delaware, District of Columbia, Florida, Georgia, Maryland, North Carolina, South Carolina, Virginia, West Virginia, Alabama,
              Kentucky, Mississippi, Tennessee, Arkansas, Louisiana, Oklahoma, Texas */
    * West: Arizona, Colorado, Idaho, Montana, Nevada, New Mexico, Utah, Wyoming, California, Oregon, Washington --note: Census puts AK and HI in with Pacific division
   
    gen region_merge =.
    replace region_merge =1 if (region==11 | region==12) //Northeast
    replace region_merge =2 if (region==21 | region==22) //Midwest
    replace region_merge =3 if inrange(region,31,33) //South
    replace region_merge =4 if (region==41 | region==42) //West
    tab region, m
    tab region_merge, m
    
    label define region_l 1 "NORTHEAST" 2 "MIDWEST" 3 "SOUTH" 4 "WEST"
    label values region_merge region_l
    tab region_merge, m
    
    gen south_merge = region_merge==3
    tab south_merge, m   

* Education variable 
    tab educd, m

    gen edu=.
    replace edu=1 if educd<=25 //<grade school (includes people with no schooling)
    replace edu=2 if educd==26 //8th grade
    replace edu=3 if inlist(educd,30,40,50,61) //<hs
    replace edu=4 if inlist(educd,60,62,63,64) //hs
    replace edu=5 if educd>64 & educ<999 //>hs. "999" = missing
    tab educd edu, m

*------------------------------------------------------------------------------------*
*------------------------------------------------------------------------------------*

****************
**** ASSIGN COARSENED OCCS
****************

* Set up variables
    sort occ1950

* Count # of Census occupations in 1950 data
    bysort occ1950: gen nvals = _n ==1
    count if nvals==1 

/* Note: Per Census documentation, OCCSCORE is a constructed 
         2-digit numeric variable that assigns occupational 
         income scores to each occupation. OCCSCORE represents 
         the median total income (in hundreds of 1950 dollars) 
         of all persons with that particular occupation in 1950. 
*/
    replace occscore=occscore*100

* Separate people with occupations in 200's based on self-employment
    replace occ1950=occ1950+1000 if (occ1950>=200 & occ1950<=290) & classwkr==1
    
* Crosswalk Census occupations to coarsened ANES occupations
    merge m:1 occ1950 using ../Crosswalks/Crosswalk_1950Census_toANES.dta
    assert _merge!=1
    drop if _merge==2
    drop _merge
    
    assert occ1950==997 if occ1950ej==. //997 = occupation missing or unknown
    drop if occ1950ej==. 
    tab occ1950 if occ1950ej==99, m nol
 
*------------------------------------------------------------------------------------*
*------------------------------------------------------------------------------------*

****************
**** SAVE 
****************

* Keep relevant variables 
    rename occ1950ej fatheroccej

    keep race south_merge region_merge edu fatheroccej inctot occscore age perwt slwt wgt* 
 
    gen census=1
    label var census "Census obs"
    
    foreach var of varlist inctot {
    gen log_father_`var' = log(`var')
    label var log_father_`var' "Log `var', Census"
    label var `var' "`var', Census"
    }
  
    compress 
    save ./output/Census1950_fathers_ages30to50.dta, replace 


